{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Models Accuracy"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Dependency parsing\n",
    "\n",
    "Trained on 80% of dataset, tested on 20% of dataset. Link to download dataset available inside the notebooks. All training sessions stored in [session/dependency](https://github.com/huseinzol05/Malaya/tree/master/session/dependency)."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### bert-base\n",
    "\n",
    "```text\n",
    "\n",
    "arc accuracy: 0.8554239102233114\n",
    "types accuracy: 0.8481064607232274\n",
    "root accuracy: 0.9203253968253969\n",
    "\n",
    "               precision    recall  f1-score   support\n",
    "\n",
    "          PAD    0.99996   1.00000   0.99998    877864\n",
    "            X    1.00000   0.99986   0.99993    145204\n",
    "          acl    0.96111   0.96190   0.96150      6037\n",
    "        advcl    0.94287   0.93895   0.94091      2408\n",
    "       advmod    0.97171   0.96904   0.97037      9464\n",
    "         amod    0.96283   0.94008   0.95132      8128\n",
    "        appos    0.97426   0.95940   0.96677      4852\n",
    "          aux    1.00000   0.50000   0.66667         4\n",
    "         case    0.98907   0.98834   0.98870     21519\n",
    "           cc    0.98089   0.98708   0.98397      6500\n",
    "        ccomp    0.95515   0.92164   0.93810       855\n",
    "     compound    0.95432   0.96565   0.95995     13479\n",
    "compound:plur    0.96507   0.97778   0.97138      1215\n",
    "         conj    0.96943   0.98036   0.97486      8604\n",
    "          cop    0.96407   0.98531   0.97457      1906\n",
    "        csubj    0.92157   0.85455   0.88679        55\n",
    "   csubj:pass    0.93750   0.78947   0.85714        19\n",
    "          dep    0.95199   0.93574   0.94380       996\n",
    "          det    0.97043   0.96678   0.96860      8248\n",
    "        fixed    0.94176   0.93672   0.93923      1122\n",
    "         flat    0.98010   0.98217   0.98113     20755\n",
    "         iobj    0.87500   0.80000   0.83582        35\n",
    "         mark    0.94507   0.97448   0.95955      2860\n",
    "         nmod    0.96363   0.95912   0.96137      8121\n",
    "        nsubj    0.97076   0.97091   0.97083     12788\n",
    "   nsubj:pass    0.95192   0.96362   0.95774      3986\n",
    "       nummod    0.98563   0.97942   0.98251      7773\n",
    "          obj    0.96915   0.97071   0.96993     10551\n",
    "          obl    0.97549   0.97164   0.97356     11389\n",
    "    parataxis    0.95038   0.90415   0.92669       699\n",
    "        punct    0.99752   0.99773   0.99762     33438\n",
    "         root    0.98046   0.98124   0.98085     10073\n",
    "        xcomp    0.95153   0.94749   0.94951      2590\n",
    "\n",
    "     accuracy                        0.99562   1243537\n",
    "    macro avg    0.96396   0.93822   0.94823   1243537\n",
    " weighted avg    0.99562   0.99562   0.99562   1243537\n",
    "```"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### tiny-bert\n",
    "\n",
    "```text\n",
    "\n",
    "arc accuracy: 0.7189048051328787\n",
    "types accuracy: 0.6942783162846734\n",
    "root accuracy: 0.8860992063492065\n",
    "\n",
    "               precision    recall  f1-score   support\n",
    "\n",
    "          PAD    0.99996   1.00000   0.99998    943088\n",
    "            X    0.99999   0.99981   0.99990    145797\n",
    "          acl    0.85006   0.80040   0.82448      6042\n",
    "        advcl    0.61783   0.60566   0.61169      2437\n",
    "       advmod    0.86865   0.86755   0.86810      9513\n",
    "         amod    0.82596   0.78837   0.80672      8217\n",
    "        appos    0.84113   0.79100   0.81530      5000\n",
    "          aux    0.80000   0.50000   0.61538         8\n",
    "         case    0.94714   0.95046   0.94879     21376\n",
    "           cc    0.92151   0.94487   0.93304      6349\n",
    "        ccomp    0.59326   0.26201   0.36349       874\n",
    "     compound    0.85764   0.83530   0.84632     13667\n",
    "compound:plur    0.83743   0.91349   0.87381      1156\n",
    "         conj    0.87306   0.90624   0.88934      8500\n",
    "          cop    0.90592   0.93670   0.92105      1943\n",
    "        csubj    0.75000   0.05263   0.09836        57\n",
    "   csubj:pass    0.00000   0.00000   0.00000        16\n",
    "          dep    0.66704   0.55176   0.60395      1082\n",
    "          det    0.89147   0.84818   0.86929      7970\n",
    "        fixed    0.80819   0.61696   0.69975      1120\n",
    "         flat    0.90396   0.93947   0.92137     21129\n",
    "         iobj    0.00000   0.00000   0.00000        25\n",
    "         mark    0.74718   0.83845   0.79019      2767\n",
    "         nmod    0.86083   0.78159   0.81930      8017\n",
    "        nsubj    0.85174   0.89750   0.87402     12712\n",
    "   nsubj:pass    0.78514   0.82246   0.80337      4061\n",
    "       nummod    0.88943   0.93509   0.91169      8026\n",
    "          obj    0.89982   0.84423   0.87114     10618\n",
    "          obl    0.84081   0.88283   0.86131     11385\n",
    "    parataxis    0.48635   0.26667   0.34446       735\n",
    "        punct    0.98350   0.99126   0.98736     33736\n",
    "         root    0.91085   0.93726   0.92387     10073\n",
    "        xcomp    0.69305   0.76415   0.72686      2544\n",
    "\n",
    "     accuracy                        0.98102   1310040\n",
    "    macro avg    0.77906   0.72946   0.74011   1310040\n",
    " weighted avg    0.98076   0.98102   0.98073   1310040\n",
    "```"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### albert-base\n",
    "\n",
    "```text\n",
    "\n",
    "arc accuracy: 0.8118309576064845\n",
    "types accuracy: 0.7931625589721538\n",
    "root accuracy: 0.879281746031746\n",
    "\n",
    "               precision    recall  f1-score   support\n",
    "\n",
    "          PAD    1.00000   1.00000   1.00000    905035\n",
    "            X    0.99997   0.99998   0.99998    159607\n",
    "          acl    0.89111   0.88994   0.89052      6051\n",
    "        advcl    0.75213   0.78003   0.76583      2373\n",
    "       advmod    0.89975   0.92642   0.91289      9378\n",
    "         amod    0.86607   0.87808   0.87204      8145\n",
    "        appos    0.87914   0.89496   0.88698      4779\n",
    "          aux    1.00000   0.37500   0.54545         8\n",
    "         case    0.96890   0.97142   0.97016     21521\n",
    "           cc    0.96049   0.96393   0.96221      6405\n",
    "        ccomp    0.70574   0.67583   0.69046       873\n",
    "     compound    0.88800   0.89660   0.89228     13530\n",
    "compound:plur    0.93381   0.93981   0.93680      1246\n",
    "         conj    0.94147   0.93436   0.93790      8608\n",
    "          cop    0.94652   0.96651   0.95641      1941\n",
    "        csubj    0.75000   0.39623   0.51852        53\n",
    "   csubj:pass    0.77778   0.77778   0.77778         9\n",
    "          dep    0.81778   0.72871   0.77068      1010\n",
    "          det    0.91665   0.90606   0.91132      8314\n",
    "        fixed    0.87862   0.80565   0.84055      1168\n",
    "         flat    0.96177   0.93608   0.94875     20400\n",
    "         iobj    0.71429   0.42857   0.53571        35\n",
    "         mark    0.88640   0.88577   0.88608      2854\n",
    "         nmod    0.86857   0.90150   0.88473      8020\n",
    "        nsubj    0.89466   0.93382   0.91382     12633\n",
    "   nsubj:pass    0.91977   0.81904   0.86648      4045\n",
    "       nummod    0.95316   0.95864   0.95589      8003\n",
    "          obj    0.90795   0.92092   0.91439     10357\n",
    "          obl    0.93016   0.90607   0.91796     11466\n",
    "    parataxis    0.72669   0.62953   0.67463       718\n",
    "        punct    0.99482   0.99724   0.99603     33312\n",
    "         root    0.93869   0.94093   0.93981     10073\n",
    "        xcomp    0.85300   0.80468   0.82813      2524\n",
    "\n",
    "     accuracy                        0.98785   1284494\n",
    "    macro avg    0.88860   0.84152   0.85761   1284494\n",
    " weighted avg    0.98786   0.98785   0.98782   1284494\n",
    "```"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### albert-tiny\n",
    "\n",
    "```text\n",
    "\n",
    "arc accuracy: 0.7087220659183397\n",
    "types accuracy: 0.6735055899028873\n",
    "root accuracy: 0.8178452380952382\n",
    "\n",
    "               precision    recall  f1-score   support\n",
    "\n",
    "          PAD    1.00000   1.00000   1.00000    901404\n",
    "            X    0.99997   0.99998   0.99997    158217\n",
    "          acl    0.74523   0.72259   0.73374      6056\n",
    "        advcl    0.44763   0.44416   0.44589      2319\n",
    "       advmod    0.80839   0.80245   0.80541      9537\n",
    "         amod    0.74481   0.69167   0.71726      8144\n",
    "        appos    0.71137   0.68084   0.69577      4963\n",
    "          aux    0.00000   0.00000   0.00000         9\n",
    "         case    0.90625   0.93745   0.92159     21056\n",
    "           cc    0.92435   0.90888   0.91655      6453\n",
    "        ccomp    0.32162   0.13918   0.19429       855\n",
    "     compound    0.76535   0.75323   0.75924     13008\n",
    "compound:plur    0.76103   0.77066   0.76581      1186\n",
    "         conj    0.79454   0.78507   0.78978      8640\n",
    "          cop    0.87581   0.90736   0.89130      1943\n",
    "        csubj    0.66667   0.04082   0.07692        49\n",
    "   csubj:pass    0.00000   0.00000   0.00000        18\n",
    "          dep    0.41637   0.38321   0.39910       929\n",
    "          det    0.81424   0.77924   0.79636      7909\n",
    "        fixed    0.63932   0.41054   0.50000      1101\n",
    "         flat    0.85963   0.91321   0.88561     20856\n",
    "         iobj    1.00000   0.03333   0.06452        30\n",
    "         mark    0.69997   0.72039   0.71003      2879\n",
    "         nmod    0.71129   0.68985   0.70041      7964\n",
    "        nsubj    0.74144   0.81233   0.77527     12719\n",
    "   nsubj:pass    0.68649   0.56466   0.61964      3905\n",
    "       nummod    0.84427   0.87244   0.85813      7581\n",
    "          obj    0.79591   0.78073   0.78825     10380\n",
    "          obl    0.75820   0.78392   0.77085     11144\n",
    "    parataxis    0.25150   0.06231   0.09988       674\n",
    "        punct    0.98207   0.98323   0.98265     33034\n",
    "         root    0.84186   0.87362   0.85745     10073\n",
    "        xcomp    0.62652   0.63961   0.63300      2489\n",
    "\n",
    "     accuracy                        0.96997   1277524\n",
    "    macro avg    0.70128   0.63294   0.64105   1277524\n",
    " weighted avg    0.96929   0.96997   0.96946   1277524\n",
    "```"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### xlnet-base\n",
    "\n",
    "```text\n",
    "\n",
    "arc accuracy: 0.9310084738376598\n",
    "types accuracy: 0.9258795751889828\n",
    "root accuracy: 0.9474206349206349\n",
    "\n",
    "               precision    recall  f1-score   support\n",
    "\n",
    "          PAD    0.99998   1.00000   0.99999    632972\n",
    "            X    1.00000   0.99997   0.99999    143586\n",
    "          acl    0.98091   0.98226   0.98158      5806\n",
    "        advcl    0.97098   0.95161   0.96120      2356\n",
    "       advmod    0.98802   0.97806   0.98302      9527\n",
    "         amod    0.95966   0.97100   0.96530      8208\n",
    "        appos    0.98846   0.98947   0.98896      4936\n",
    "          aux    1.00000   1.00000   1.00000        10\n",
    "         case    0.99454   0.99110   0.99282     21128\n",
    "           cc    0.98704   0.99518   0.99109      6429\n",
    "        ccomp    0.89091   0.97313   0.93021       856\n",
    "     compound    0.98091   0.96643   0.97362     13079\n",
    "compound:plur    0.99068   0.98401   0.98733      1188\n",
    "         conj    0.98303   0.99214   0.98756      8524\n",
    "          cop    0.98664   0.99071   0.98867      1938\n",
    "        csubj    0.96000   0.96000   0.96000        50\n",
    "   csubj:pass    0.95652   0.91667   0.93617        24\n",
    "          dep    0.98182   0.96716   0.97444      1005\n",
    "          det    0.98698   0.97756   0.98225      8065\n",
    "        fixed    0.96071   0.97162   0.96613      1057\n",
    "         flat    0.98389   0.99064   0.98726     20411\n",
    "         iobj    0.96154   0.80645   0.87719        31\n",
    "         mark    0.96611   0.98539   0.97565      2806\n",
    "         nmod    0.97956   0.97285   0.97619      8030\n",
    "        nsubj    0.98317   0.98402   0.98359     12701\n",
    "   nsubj:pass    0.96930   0.97858   0.97392      3969\n",
    "       nummod    0.99113   0.99327   0.99220      7879\n",
    "          obj    0.98266   0.98076   0.98171     10342\n",
    "          obl    0.98468   0.98256   0.98362     11183\n",
    "    parataxis    0.95595   0.95455   0.95525       682\n",
    "        punct    0.99952   0.99949   0.99950     33107\n",
    "         root    0.98888   0.98888   0.98888     10073\n",
    "        xcomp    0.95951   0.96027   0.95989      2517\n",
    "\n",
    "     accuracy                        0.99678    994475\n",
    "    macro avg    0.97738   0.97381   0.97531    994475\n",
    " weighted avg    0.99679   0.99678   0.99678    994475\n",
    "```"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### alxlnet-base\n",
    "\n",
    "```text\n",
    "\n",
    "arc accuracy: 0.8943757029483008\n",
    "types accuracy: 0.88690168487317\n",
    "root accuracy: 0.9425595238095238\n",
    "\n",
    "               precision    recall  f1-score   support\n",
    "\n",
    "          PAD    0.99999   1.00000   0.99999    644667\n",
    "            X    0.99998   0.99999   0.99998    144988\n",
    "          acl    0.95995   0.96137   0.96066      6058\n",
    "        advcl    0.91687   0.93839   0.92751      2386\n",
    "       advmod    0.97160   0.97620   0.97389      9496\n",
    "         amod    0.95264   0.94761   0.95012      8342\n",
    "        appos    0.97560   0.97638   0.97599      4995\n",
    "          aux    1.00000   1.00000   1.00000         6\n",
    "         case    0.99147   0.98685   0.98916     21680\n",
    "           cc    0.97523   0.99377   0.98441      6418\n",
    "        ccomp    0.95249   0.90112   0.92610       890\n",
    "     compound    0.95478   0.95656   0.95567     13399\n",
    "compound:plur    0.97575   0.98067   0.97821      1190\n",
    "         conj    0.96575   0.98929   0.97738      8494\n",
    "          cop    0.98201   0.98708   0.98454      1935\n",
    "        csubj    1.00000   0.90476   0.95000        42\n",
    "   csubj:pass    0.91667   0.91667   0.91667        12\n",
    "          dep    0.96490   0.94781   0.95628      1073\n",
    "          det    0.96461   0.97375   0.96916      8230\n",
    "        fixed    0.95762   0.92188   0.93941      1152\n",
    "         flat    0.98208   0.98030   0.98119     20967\n",
    "         iobj    1.00000   0.82927   0.90667        41\n",
    "         mark    0.96463   0.95609   0.96034      2824\n",
    "         nmod    0.96933   0.95492   0.96207      8207\n",
    "        nsubj    0.97533   0.97086   0.97309     12867\n",
    "   nsubj:pass    0.95811   0.94145   0.94970      3911\n",
    "       nummod    0.98952   0.98590   0.98770      7659\n",
    "          obj    0.97249   0.96839   0.97044     10440\n",
    "          obl    0.97129   0.97222   0.97175     11483\n",
    "    parataxis    0.95691   0.91348   0.93469       705\n",
    "        punct    0.99883   0.99955   0.99919     33252\n",
    "         root    0.98284   0.98372   0.98328     10073\n",
    "        xcomp    0.92520   0.94988   0.93738      2474\n",
    "\n",
    "     accuracy                        0.99475   1010356\n",
    "    macro avg    0.97044   0.95958   0.96462   1010356\n",
    " weighted avg    0.99476   0.99475   0.99475   1010356\n",
    "```"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Emotion Analysis\n",
    "\n",
    "Trained on 80% of dataset, tested on 20% of dataset. Link to download dataset available inside the notebooks. All training sessions stored in [session/emotion](https://github.com/huseinzol05/Malaya/tree/master/session/emotion)."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### multinomial\n",
    "\n",
    "```text\n",
    "\n",
    "              precision    recall  f1-score   support\n",
    "\n",
    "       anger    0.88832   0.90889   0.89848      5872\n",
    "        fear    0.89515   0.88078   0.88791      4110\n",
    "       happy    0.88992   0.92776   0.90845      6091\n",
    "        love    0.92420   0.90616   0.91509      4252\n",
    "     sadness    0.91943   0.87356   0.89591      5212\n",
    "    surprise    0.92340   0.92838   0.92588      2597\n",
    "\n",
    "    accuracy                        0.90371     28134\n",
    "   macro avg    0.90674   0.90426   0.90529     28134\n",
    "weighted avg    0.90409   0.90371   0.90366     28134\n",
    "\n",
    "```"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### bert-base\n",
    "\n",
    "```text\n",
    "\n",
    "              precision    recall  f1-score   support\n",
    "\n",
    "       anger    0.99712   0.99763   0.99737      5895\n",
    "        fear    0.99687   0.99759   0.99723      4150\n",
    "       happy    0.99900   0.99900   0.99900      6017\n",
    "        love    0.99855   0.99615   0.99735      4154\n",
    "     sadness    0.99793   0.99906   0.99849      5307\n",
    "    surprise    0.99770   0.99694   0.99732      2612\n",
    "\n",
    "    accuracy                        0.99790     28135\n",
    "   macro avg    0.99786   0.99773   0.99779     28135\n",
    "weighted avg    0.99790   0.99790   0.99790     28135\n",
    "\n",
    "```"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### tiny-bert\n",
    "\n",
    "```text\n",
    "\n",
    "              precision    recall  f1-score   support\n",
    "\n",
    "       anger    0.99765   0.99481   0.99623      5970\n",
    "        fear    0.99607   0.99656   0.99631      4068\n",
    "       happy    0.99671   0.99918   0.99794      6062\n",
    "        love    0.99758   0.99638   0.99698      4145\n",
    "     sadness    0.99736   0.99793   0.99764      5303\n",
    "    surprise    0.99614   0.99691   0.99652      2587\n",
    "\n",
    "    accuracy                        0.99701     28135\n",
    "   macro avg    0.99692   0.99696   0.99694     28135\n",
    "weighted avg    0.99702   0.99701   0.99701     28135\n",
    "\n",
    "```"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### albert-base\n",
    "\n",
    "```text\n",
    "\n",
    "              precision    recall  f1-score   support\n",
    "\n",
    "       anger    0.99785   0.99472   0.99628      6062\n",
    "        fear    0.99582   0.99926   0.99754      4056\n",
    "       happy    0.99866   0.99866   0.99866      5988\n",
    "        love    0.99712   0.99760   0.99736      4162\n",
    "     sadness    0.99813   0.99813   0.99813      5334\n",
    "    surprise    0.99685   0.99803   0.99744      2533\n",
    "\n",
    "    accuracy                        0.99758     28135\n",
    "   macro avg    0.99740   0.99773   0.99757     28135\n",
    "weighted avg    0.99758   0.99758   0.99758     28135\n",
    "\n",
    "```"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### albert-tiny\n",
    "\n",
    "```text\n",
    "\n",
    "              precision    recall  f1-score   support\n",
    "\n",
    "       anger    0.99396   0.98603   0.98998      6012\n",
    "        fear    0.99390   0.99512   0.99451      4096\n",
    "       happy    0.99652   0.99652   0.99652      6030\n",
    "        love    0.99114   0.99187   0.99150      4059\n",
    "     sadness    0.99121   0.99699   0.99409      5316\n",
    "    surprise    0.99278   0.99619   0.99448      2622\n",
    "\n",
    "    accuracy                        0.99346     28135\n",
    "   macro avg    0.99325   0.99378   0.99351     28135\n",
    "weighted avg    0.99346   0.99346   0.99346     28135\n",
    "\n",
    "```"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### xlnet-base\n",
    "\n",
    "```text\n",
    "\n",
    "              precision    recall  f1-score   support\n",
    "\n",
    "       anger    0.99699   0.99733   0.99716      5983\n",
    "        fear    0.99778   0.99827   0.99802      4045\n",
    "       happy    0.99883   0.99850   0.99867      6005\n",
    "        love    0.99718   0.99625   0.99671      4261\n",
    "     sadness    0.99754   0.99773   0.99764      5288\n",
    "    surprise    0.99804   0.99843   0.99824      2553\n",
    "\n",
    "    accuracy                        0.99773     28135\n",
    "   macro avg    0.99773   0.99775   0.99774     28135\n",
    "weighted avg    0.99773   0.99773   0.99773     28135\n",
    "\n",
    "```"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### alxlnet-base\n",
    "\n",
    "```text\n",
    "\n",
    "              precision    recall  f1-score   support\n",
    "\n",
    "       anger    0.99669   0.99439   0.99554      6065\n",
    "        fear    0.99702   0.99727   0.99714      4027\n",
    "       happy    0.99764   0.99949   0.99857      5918\n",
    "        love    0.99554   0.99694   0.99624      4250\n",
    "     sadness    0.99867   0.99641   0.99754      5286\n",
    "    surprise    0.99422   0.99730   0.99576      2589\n",
    "\n",
    "    accuracy                        0.99691     28135\n",
    "   macro avg    0.99663   0.99697   0.99680     28135\n",
    "weighted avg    0.99691   0.99691   0.99691     28135\n",
    "\n",
    "```"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Entities Recognition\n",
    "\n",
    "Trained on 80% of dataset, tested on 20% of dataset. Link to download dataset available inside the notebooks. All training sessions stored in [session/entities](https://github.com/huseinzol05/Malaya/tree/master/session/entities)."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### bert-base\n",
    "\n",
    "```text\n",
    "\n",
    "              precision    recall  f1-score   support\n",
    "\n",
    "       OTHER    0.99224   0.99931   0.99576   5160854\n",
    "         PAD    1.00000   1.00000   1.00000    877767\n",
    "           X    0.99995   1.00000   0.99998   2921053\n",
    "       event    0.99911   0.88679   0.93961    143787\n",
    "         law    0.99704   0.97040   0.98354    146950\n",
    "    location    0.98677   0.98420   0.98548    428869\n",
    "organization    0.99335   0.95355   0.97304    694150\n",
    "      person    0.97636   0.99476   0.98547    507960\n",
    "    quantity    0.99965   0.99803   0.99884     88200\n",
    "        time    0.98462   0.99938   0.99194    179880\n",
    "\n",
    "    accuracy                        0.99406  11149470\n",
    "   macro avg    0.99291   0.97864   0.98537  11149470\n",
    "weighted avg    0.99409   0.99406   0.99400  11149470\n",
    "\n",
    "```"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### tiny-bert\n",
    "\n",
    "```text\n",
    "\n",
    "              precision    recall  f1-score   support\n",
    "\n",
    "       OTHER    0.98178   0.99946   0.99054   5160854\n",
    "         PAD    1.00000   1.00000   1.00000   1673627\n",
    "           X    1.00000   1.00000   1.00000   2921053\n",
    "       event    0.99666   0.70215   0.82388    143787\n",
    "         law    0.99522   0.94921   0.97167    146950\n",
    "    location    0.96753   0.96547   0.96650    428869\n",
    "organization    0.99403   0.87009   0.92794    694150\n",
    "      person    0.92771   0.99283   0.95917    507960\n",
    "    quantity    0.99643   0.99762   0.99703     88200\n",
    "        time    0.95574   0.99855   0.97668    179880\n",
    "\n",
    "    accuracy                        0.98642  11945330\n",
    "   macro avg    0.98151   0.94754   0.96134  11945330\n",
    "weighted avg    0.98675   0.98642   0.98594  11945330\n",
    "\n",
    "\n",
    "```"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### albert-base\n",
    "\n",
    "```text\n",
    "\n",
    "              precision    recall  f1-score   support\n",
    "\n",
    "       OTHER    0.98087   0.99948   0.99008   5160854\n",
    "         PAD    1.00000   1.00000   1.00000    881183\n",
    "           X    0.99996   1.00000   0.99998   2933007\n",
    "       event    0.99021   0.80012   0.88507    143787\n",
    "         law    0.96373   0.94234   0.95291    146950\n",
    "    location    0.97388   0.96256   0.96819    428869\n",
    "organization    0.99506   0.83927   0.91055    694150\n",
    "      person    0.91340   0.99378   0.95189    507960\n",
    "    quantity    0.99636   0.99704   0.99670     88200\n",
    "        time    0.98911   0.99859   0.99383    179880\n",
    "\n",
    "    accuracy                        0.98466  11164840\n",
    "   macro avg    0.98026   0.95332   0.96492  11164840\n",
    "weighted avg    0.98509   0.98466   0.98421  11164840\n",
    "\n",
    "```"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### albert-tiny\n",
    "\n",
    "```text\n",
    "\n",
    "              precision    recall  f1-score   support\n",
    "\n",
    "       OTHER    0.96614   0.99651   0.98109   5160854\n",
    "         PAD    1.00000   1.00000   1.00000    881183\n",
    "           X    0.99984   1.00000   0.99992   2933007\n",
    "       event    0.97661   0.52453   0.68250    143787\n",
    "         law    0.97992   0.89007   0.93284    146950\n",
    "    location    0.92117   0.91206   0.91659    428869\n",
    "organization    0.96821   0.76413   0.85414    694150\n",
    "      person    0.87211   0.97366   0.92009    507960\n",
    "    quantity    0.98545   0.99220   0.98881     88200\n",
    "        time    0.94056   0.98312   0.96137    179880\n",
    "\n",
    "    accuracy                        0.97124  11164840\n",
    "   macro avg    0.96100   0.90363   0.92374  11164840\n",
    "weighted avg    0.97185   0.97124   0.96965  11164840\n",
    "\n",
    "```"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### xlnet-base\n",
    "\n",
    "```text\n",
    "\n",
    "              precision    recall  f1-score   support\n",
    "\n",
    "       OTHER    0.98873   0.99965   0.99416   5160854\n",
    "         PAD    1.00000   1.00000   1.00000    877767\n",
    "           X    0.99999   1.00000   0.99999   2921053\n",
    "       event    0.99404   0.93677   0.96456    143787\n",
    "         law    0.99734   0.98832   0.99281    146950\n",
    "    location    0.99189   0.97927   0.98554    428869\n",
    "organization    0.99785   0.92433   0.95968    694150\n",
    "      person    0.97446   0.98956   0.98195    507960\n",
    "    quantity    0.99861   0.99875   0.99868     88200\n",
    "        time    0.99153   0.99872   0.99511    179880\n",
    "\n",
    "    accuracy                        0.99285  11149470\n",
    "   macro avg    0.99344   0.98154   0.98725  11149470\n",
    "weighted avg    0.99291   0.99285   0.99276  11149470\n",
    "\n",
    "```"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### alxlnet-base\n",
    "\n",
    "```text\n",
    "\n",
    "              precision    recall  f1-score   support\n",
    "\n",
    "       OTHER    0.99124   0.99962   0.99541   5160854\n",
    "         PAD    1.00000   1.00000   1.00000    877767\n",
    "           X    1.00000   1.00000   1.00000   2921053\n",
    "       event    0.99766   0.86900   0.92890    143787\n",
    "         law    0.99837   0.97023   0.98410    146950\n",
    "    location    0.99004   0.98249   0.98625    428869\n",
    "organization    0.99584   0.94088   0.96758    694150\n",
    "      person    0.96062   0.99571   0.97785    507960\n",
    "    quantity    0.99920   0.99976   0.99948     88200\n",
    "        time    0.98851   0.99976   0.99410    179880\n",
    "\n",
    "    accuracy                        0.99319  11149470\n",
    "   macro avg    0.99215   0.97575   0.98337  11149470\n",
    "weighted avg    0.99327   0.99319   0.99309  11149470\n",
    "\n",
    "```"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Language Detection\n",
    "\n",
    "Trained on 80% of dataset, tested on 20% of dataset. Link to download dataset available inside the notebooks. All training sessions stored in [session/language-detection](https://github.com/huseinzol05/Malaya/tree/master/session/language-detection)."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### fast-text\n",
    "\n",
    "```text\n",
    "              precision    recall  f1-score   support\n",
    "\n",
    "         eng    0.94014   0.96750   0.95362    553739\n",
    "         ind    0.97290   0.97316   0.97303    576059\n",
    "       malay    0.98674   0.95262   0.96938   1800649\n",
    "    manglish    0.96595   0.98417   0.97498    181442\n",
    "       other    0.98454   0.99698   0.99072   1428083\n",
    "       rojak    0.81149   0.91650   0.86080    189678\n",
    "\n",
    "    accuracy                        0.97002   4729650\n",
    "   macro avg    0.94363   0.96515   0.95375   4729650\n",
    "weighted avg    0.97111   0.97002   0.97028   4729650\n",
    "\n",
    "```"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Deep learning\n",
    "\n",
    "```text\n",
    "              precision    recall  f1-score   support\n",
    "\n",
    "         eng    0.96760   0.97401   0.97080    553739\n",
    "         ind    0.97635   0.96131   0.96877    576059\n",
    "       malay    0.96985   0.98498   0.97736   1800649\n",
    "    manglish    0.98036   0.96569   0.97297    181442\n",
    "       other    0.99641   0.99627   0.99634   1428083\n",
    "       rojak    0.94221   0.84302   0.88986    189678\n",
    "\n",
    "    accuracy                        0.97779   4729650\n",
    "   macro avg    0.97213   0.95421   0.96268   4729650\n",
    "weighted avg    0.97769   0.97779   0.97760   4729650\n",
    "\n",
    "```"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### POS Recognition\n",
    "\n",
    "Trained on 80% of dataset, tested on 20% of dataset.  Link to download dataset available inside the notebooks. All training sessions stored in [session/pos](https://github.com/huseinzol05/Malaya/tree/master/session/pos)."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### bert-base\n",
    "\n",
    "```text\n",
    "\n",
    "              precision    recall  f1-score   support\n",
    "\n",
    "         ADJ    0.79261   0.80819   0.80033     45666\n",
    "         ADP    0.95551   0.96155   0.95852    119589\n",
    "         ADV    0.86824   0.83832   0.85302     47760\n",
    "         AUX    0.99362   0.99710   0.99536     10000\n",
    "       CCONJ    0.97639   0.92470   0.94984     37171\n",
    "         DET    0.93663   0.92556   0.93107     38839\n",
    "        NOUN    0.91335   0.89454   0.90385    268329\n",
    "         NUM    0.91883   0.94521   0.93183     41211\n",
    "         PAD    0.98980   1.00000   0.99487    147445\n",
    "        PART    0.91225   0.91291   0.91258      5500\n",
    "        PRON    0.97505   0.94047   0.95745     48835\n",
    "       PROPN    0.91824   0.94054   0.92926    227608\n",
    "       PUNCT    0.99829   0.99853   0.99841    182824\n",
    "       SCONJ    0.76934   0.84297   0.80447     15150\n",
    "         SYM    0.99711   0.95722   0.97676      3600\n",
    "        VERB    0.94284   0.94533   0.94408    124518\n",
    "           X    0.99947   0.99882   0.99914    413549\n",
    "\n",
    "    accuracy                        0.95254   1777594\n",
    "   macro avg    0.93280   0.93129   0.93181   1777594\n",
    "weighted avg    0.95272   0.95254   0.95254   1777594\n",
    "\n",
    "```"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### tiny-bert\n",
    "\n",
    "```text\n",
    "\n",
    "              precision    recall  f1-score   support\n",
    "\n",
    "         ADJ    0.78068   0.79622   0.78837     45666\n",
    "         ADP    0.95356   0.96107   0.95730    119589\n",
    "         ADV    0.85048   0.83499   0.84266     47760\n",
    "         AUX    0.99502   0.99850   0.99676     10000\n",
    "       CCONJ    0.96900   0.91986   0.94379     37171\n",
    "         DET    0.93853   0.94263   0.94058     38839\n",
    "        NOUN    0.89955   0.89812   0.89883    268329\n",
    "         NUM    0.93685   0.93740   0.93712     41211\n",
    "         PAD    0.99445   1.00000   0.99722    272341\n",
    "        PART    0.91302   0.91418   0.91360      5500\n",
    "        PRON    0.97478   0.93785   0.95596     48835\n",
    "       PROPN    0.92504   0.92239   0.92371    227608\n",
    "       PUNCT    0.99776   0.99815   0.99796    182824\n",
    "       SCONJ    0.75747   0.84376   0.79829     15150\n",
    "         SYM    0.95358   0.90167   0.92690      3600\n",
    "        VERB    0.93816   0.94470   0.94142    124518\n",
    "           X    0.99974   0.99879   0.99926    413549\n",
    "\n",
    "    accuracy                        0.95343   1902490\n",
    "   macro avg    0.92810   0.92649   0.92704   1902490\n",
    "weighted avg    0.95364   0.95343   0.95349   1902490\n",
    "\n",
    "```"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### albert-base\n",
    "\n",
    "```text\n",
    "              precision    recall  f1-score   support\n",
    "\n",
    "         ADJ    0.81706   0.76324   0.78923     45666\n",
    "         ADP    0.95181   0.96143   0.95660    119589\n",
    "         ADV    0.84898   0.84148   0.84521     47760\n",
    "         AUX    0.99502   1.00000   0.99751     10000\n",
    "       CCONJ    0.93370   0.94071   0.93719     37171\n",
    "         DET    0.93324   0.92824   0.93073     38839\n",
    "        NOUN    0.90102   0.89915   0.90008    268329\n",
    "         NUM    0.93291   0.94002   0.93645     41211\n",
    "         PAD    1.00000   1.00000   1.00000    147215\n",
    "        PART    0.91795   0.89909   0.90842      5500\n",
    "        PRON    0.97728   0.93198   0.95409     48835\n",
    "       PROPN    0.91565   0.93866   0.92701    227608\n",
    "       PUNCT    0.99818   0.99890   0.99854    182824\n",
    "       SCONJ    0.79499   0.74330   0.76828     15150\n",
    "         SYM    0.98485   0.90278   0.94203      3600\n",
    "        VERB    0.94143   0.94251   0.94197    124518\n",
    "           X    0.99972   0.99975   0.99973    414899\n",
    "\n",
    "    accuracy                        0.95105   1778714\n",
    "   macro avg    0.93199   0.91948   0.92547   1778714\n",
    "weighted avg    0.95085   0.95105   0.95088   1778714\n",
    "\n",
    "```"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### albert-tiny\n",
    "\n",
    "```text\n",
    "\n",
    "              precision    recall  f1-score   support\n",
    "\n",
    "         ADJ    0.71343   0.69192   0.70251     45666\n",
    "         ADP    0.94552   0.92892   0.93715    119589\n",
    "         ADV    0.82394   0.77969   0.80120     47760\n",
    "         AUX    0.99502   0.99930   0.99716     10000\n",
    "       CCONJ    0.95223   0.92397   0.93789     37171\n",
    "         DET    0.92886   0.89495   0.91159     38839\n",
    "        NOUN    0.85984   0.87755   0.86860    268329\n",
    "         NUM    0.90365   0.90240   0.90303     41211\n",
    "         PAD    1.00000   1.00000   1.00000    147215\n",
    "        PART    0.88633   0.82509   0.85461      5500\n",
    "        PRON    0.94693   0.93722   0.94205     48835\n",
    "       PROPN    0.90464   0.89602   0.90031    227608\n",
    "       PUNCT    0.98900   0.99757   0.99327    182824\n",
    "       SCONJ    0.70104   0.77234   0.73496     15150\n",
    "         SYM    0.94761   0.86417   0.90397      3600\n",
    "        VERB    0.90093   0.92448   0.91255    124518\n",
    "           X    0.99946   0.99954   0.99950    414899\n",
    "\n",
    "    accuracy                        0.93335   1778714\n",
    "   macro avg    0.90579   0.89501   0.90002   1778714\n",
    "weighted avg    0.93344   0.93335   0.93331   1778714\n",
    "\n",
    "```"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### xlnet-base\n",
    "\n",
    "```text\n",
    "\n",
    "              precision    recall  f1-score   support\n",
    "\n",
    "         ADJ    0.83194   0.77563   0.80280     45666\n",
    "         ADP    0.96501   0.95786   0.96142    119589\n",
    "         ADV    0.85073   0.84144   0.84606     47760\n",
    "         AUX    0.99502   0.99950   0.99726     10000\n",
    "       CCONJ    0.96564   0.92473   0.94474     37171\n",
    "         DET    0.94985   0.93192   0.94080     38839\n",
    "        NOUN    0.89484   0.92123   0.90784    268329\n",
    "         NUM    0.94009   0.94511   0.94260     41211\n",
    "         PAD    0.99816   1.00000   0.99908    146373\n",
    "        PART    0.91259   0.94345   0.92777      5500\n",
    "        PRON    0.96988   0.94223   0.95586     48835\n",
    "       PROPN    0.93581   0.92557   0.93066    227608\n",
    "       PUNCT    0.99831   0.99933   0.99882    182824\n",
    "       SCONJ    0.73907   0.82376   0.77912     15150\n",
    "         SYM    0.96944   0.96917   0.96930      3600\n",
    "        VERB    0.94517   0.94727   0.94622    124518\n",
    "           X    0.99992   0.99957   0.99975    410749\n",
    "\n",
    "    accuracy                        0.95410   1773722\n",
    "   macro avg    0.93303   0.93222   0.93236   1773722\n",
    "weighted avg    0.95433   0.95410   0.95411   1773722\n",
    "\n",
    "\n",
    "```"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### alxlnet-base\n",
    "\n",
    "```text\n",
    "\n",
    "              precision    recall  f1-score   support\n",
    "\n",
    "         ADJ    0.79153   0.79396   0.79275     45666\n",
    "         ADP    0.95941   0.96102   0.96021    119589\n",
    "         ADV    0.85117   0.82073   0.83567     47760\n",
    "         AUX    0.99641   0.99860   0.99750     10000\n",
    "       CCONJ    0.96687   0.92793   0.94700     37171\n",
    "         DET    0.91526   0.93156   0.92334     38839\n",
    "        NOUN    0.91155   0.89253   0.90194    268329\n",
    "         NUM    0.92871   0.93635   0.93252     41211\n",
    "         PAD    0.99816   1.00000   0.99908    146373\n",
    "        PART    0.91285   0.92364   0.91821      5500\n",
    "        PRON    0.97040   0.94404   0.95704     48835\n",
    "       PROPN    0.90899   0.94301   0.92569    227608\n",
    "       PUNCT    0.99887   0.99928   0.99908    182824\n",
    "       SCONJ    0.69691   0.86964   0.77375     15150\n",
    "         SYM    0.99941   0.94556   0.97174      3600\n",
    "        VERB    0.95809   0.93052   0.94411    124518\n",
    "           X    0.99985   0.99945   0.99965    410749\n",
    "\n",
    "    accuracy                        0.95109   1773722\n",
    "   macro avg    0.92732   0.93046   0.92819   1773722\n",
    "weighted avg    0.95168   0.95109   0.95121   1773722\n",
    "\n",
    "```"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Relevancy\n",
    "\n",
    "Trained on 80% of dataset, tested on 20% of dataset. Link to download dataset available inside the notebooks. All training sessions stored in [session/relevancy](https://github.com/huseinzol05/Malaya/tree/master/session/relevancy)."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### bert-base\n",
    "\n",
    "```text\n",
    "\n",
    "              precision    recall  f1-score   support\n",
    "\n",
    "not relevant    0.87625   0.73478   0.79930      5946\n",
    "    relevant    0.87117   0.94531   0.90673     11281\n",
    "\n",
    "    accuracy                        0.87264     17227\n",
    "   macro avg    0.87371   0.84004   0.85302     17227\n",
    "weighted avg    0.87293   0.87264   0.86965     17227\n",
    "\n",
    "```"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### tiny-bert\n",
    "\n",
    "```text\n",
    "\n",
    "              precision    recall  f1-score   support\n",
    "\n",
    "not relevant    0.95455   0.00353   0.00704      5946\n",
    "    relevant    0.65562   0.99991   0.79197     11281\n",
    "\n",
    "    accuracy                        0.65601     17227\n",
    "   macro avg    0.80508   0.50172   0.39950     17227\n",
    "weighted avg    0.75880   0.65601   0.52104     17227\n",
    "\n",
    "\n",
    "```"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### albert-base\n",
    "\n",
    "```text\n",
    "\n",
    "              precision    recall  f1-score   support\n",
    "\n",
    "not relevant    0.81807   0.80844   0.81323      5946\n",
    "    relevant    0.89966   0.90524   0.90244     11281\n",
    "\n",
    "    accuracy                        0.87183     17227\n",
    "   macro avg    0.85886   0.85684   0.85783     17227\n",
    "weighted avg    0.87150   0.87183   0.87165     17227\n",
    "\n",
    "```"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### albert-tiny\n",
    "\n",
    "```text\n",
    "\n",
    "              precision    recall  f1-score   support\n",
    "\n",
    "not relevant    0.84793   0.66768   0.74708      5946\n",
    "    relevant    0.84249   0.93689   0.88718     11281\n",
    "\n",
    "    accuracy                        0.84397     17227\n",
    "   macro avg    0.84521   0.80228   0.81713     17227\n",
    "weighted avg    0.84437   0.84397   0.83883     17227\n",
    "\n",
    "```"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### xlnet-base\n",
    "\n",
    "```text\n",
    "\n",
    "              precision    recall  f1-score   support\n",
    "\n",
    "not relevant    0.85676   0.80272   0.82886      5946\n",
    "    relevant    0.89937   0.92926   0.91407     11281\n",
    "\n",
    "    accuracy                        0.88559     17227\n",
    "   macro avg    0.87806   0.86599   0.87147     17227\n",
    "weighted avg    0.88466   0.88559   0.88466     17227\n",
    "\n",
    "```"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### alxlnet-base\n",
    "\n",
    "```text\n",
    "\n",
    "              precision    recall  f1-score   support\n",
    "\n",
    "not relevant    0.89878   0.71678   0.79753      5946\n",
    "    relevant    0.86512   0.95745   0.90895     11281\n",
    "\n",
    "    accuracy                        0.87438     17227\n",
    "   macro avg    0.88195   0.83712   0.85324     17227\n",
    "weighted avg    0.87674   0.87438   0.87049     17227\n",
    "\n",
    "```"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Sentiment Analysis\n",
    "\n",
    "Trained on 80% of dataset, tested on 20% of dataset. Link to download dataset available inside the notebooks. All training sessions stored in [session/sentiment](https://github.com/huseinzol05/Malaya/tree/master/session/sentiment)."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### multinomial\n",
    "\n",
    "```text\n",
    "              precision    recall  f1-score   support\n",
    "\n",
    "    negative    0.76305   0.89993   0.82586     15459\n",
    "     neutral    0.81065   0.76562   0.78749     16938\n",
    "    positive    0.76113   0.61208   0.67852      9355\n",
    "\n",
    "    accuracy                        0.78094     41752\n",
    "   macro avg    0.77828   0.75921   0.76396     41752\n",
    "weighted avg    0.78193   0.78094   0.77728     41752\n",
    "\n",
    "```"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### bert-base\n",
    "\n",
    "```text\n",
    "              precision    recall  f1-score   support\n",
    "\n",
    "    negative    0.95700   0.94722   0.95208     15459\n",
    "     neutral    0.94767   0.94403   0.94585     16938\n",
    "    positive    0.89079   0.91203   0.90128      9355\n",
    "\n",
    "    accuracy                        0.93804     41752\n",
    "   macro avg    0.93182   0.93442   0.93307     41752\n",
    "weighted avg    0.93838   0.93804   0.93817     41752\n",
    "```"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### tiny-bert\n",
    "\n",
    "```text\n",
    "              precision    recall  f1-score   support\n",
    "\n",
    "    negative    0.95214   0.95362   0.95288     15459\n",
    "     neutral    0.93852   0.94728   0.94288     16938\n",
    "    positive    0.91104   0.89332   0.90209      9355\n",
    "\n",
    "    accuracy                        0.93754     41752\n",
    "   macro avg    0.93390   0.93141   0.93262     41752\n",
    "weighted avg    0.93741   0.93754   0.93744     41752\n",
    "```"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### albert-base\n",
    "\n",
    "```text\n",
    "              precision    recall  f1-score   support\n",
    "\n",
    "    negative    0.95209   0.93447   0.94320     15459\n",
    "     neutral    0.93541   0.91575   0.92548     16938\n",
    "    positive    0.84935   0.90764   0.87753      9355\n",
    "\n",
    "    accuracy                        0.92087     41752\n",
    "   macro avg    0.91228   0.91929   0.91540     41752\n",
    "weighted avg    0.92230   0.92087   0.92130     41752\n",
    "```"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### albert-tiny\n",
    "\n",
    "```text\n",
    "              precision    recall  f1-score   support\n",
    "\n",
    "    negative    0.92378   0.95731   0.94025     15459\n",
    "     neutral    0.94531   0.90825   0.92641     16938\n",
    "    positive    0.87418   0.88381   0.87897      9355\n",
    "\n",
    "    accuracy                        0.92094     41752\n",
    "   macro avg    0.91442   0.91646   0.91521     41752\n",
    "weighted avg    0.92140   0.92094   0.92090     41752\n",
    "```"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### xlnet-base\n",
    "\n",
    "```text\n",
    "              precision    recall  f1-score   support\n",
    "\n",
    "    negative    0.91680   0.97438   0.94471     15459\n",
    "     neutral    0.96408   0.90164   0.93182     16938\n",
    "    positive    0.89083   0.90283   0.89679      9355\n",
    "\n",
    "    accuracy                        0.92884     41752\n",
    "   macro avg    0.92390   0.92629   0.92444     41752\n",
    "weighted avg    0.93016   0.92884   0.92874     41752\n",
    "```"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### alxlnet-base\n",
    "\n",
    "```text\n",
    "              precision    recall  f1-score   support\n",
    "\n",
    "    negative    0.93771   0.95336   0.94547     15459\n",
    "     neutral    0.95482   0.90949   0.93160     16938\n",
    "    positive    0.86436   0.91480   0.88887      9355\n",
    "\n",
    "    accuracy                        0.92693     41752\n",
    "   macro avg    0.91896   0.92589   0.92198     41752\n",
    "weighted avg    0.92821   0.92693   0.92716     41752\n",
    "```"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### fastformer-base\n",
    "\n",
    "```text\n",
    "              precision    recall  f1-score   support\n",
    "\n",
    "    negative    0.91534   0.92322   0.91926     15459\n",
    "     neutral    0.92113   0.89710   0.90895     16938\n",
    "    positive    0.84189   0.86970   0.85557      9355\n",
    "\n",
    "    accuracy                        0.90063     41752\n",
    "   macro avg    0.89279   0.89667   0.89459     41752\n",
    "weighted avg    0.90123   0.90063   0.90081     41752\n",
    "```"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### fastformer-tiny\n",
    "\n",
    "```text\n",
    "              precision    recall  f1-score   support\n",
    "\n",
    "    negative    0.92475   0.92690   0.92583     15459\n",
    "     neutral    0.90404   0.93441   0.91897     16938\n",
    "    positive    0.89086   0.83324   0.86109      9355\n",
    "\n",
    "    accuracy                        0.90896     41752\n",
    "   macro avg    0.90655   0.89819   0.90196     41752\n",
    "weighted avg    0.90875   0.90896   0.90854     41752\n",
    "```"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Similarity\n",
    "\n",
    "Trained on 80% of dataset, tested on 20% of dataset. Link to download dataset available inside the notebooks. All training sessions stored in [session/similarity](https://github.com/huseinzol05/Malaya/tree/master/session/similarity)."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### bert-base\n",
    "\n",
    "```text\n",
    "\n",
    "              precision    recall  f1-score   support\n",
    "\n",
    " not similar    0.91813   0.86843   0.89259    114935\n",
    "     similar    0.84816   0.90468   0.87551     93371\n",
    "\n",
    "    accuracy                        0.88468    208306\n",
    "   macro avg    0.88315   0.88656   0.88405    208306\n",
    "weighted avg    0.88677   0.88468   0.88493    208306\n",
    "\n",
    "```"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### tiny-bert\n",
    "\n",
    "```text\n",
    "\n",
    "              precision    recall  f1-score   support\n",
    "\n",
    " not similar    0.90845   0.85704   0.88200    114843\n",
    "     similar    0.83576   0.89387   0.86384     93463\n",
    "\n",
    "    accuracy                        0.87357    208306\n",
    "   macro avg    0.87210   0.87546   0.87292    208306\n",
    "weighted avg    0.87583   0.87357   0.87385    208306\n",
    "\n",
    "```"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### albert-base\n",
    "\n",
    "```text\n",
    "\n",
    "              precision    recall  f1-score   support\n",
    "\n",
    " not similar    0.88351   0.88549   0.88450    114523\n",
    "     similar    0.85978   0.85743   0.85860     93783\n",
    "\n",
    "    accuracy                        0.87286    208306\n",
    "   macro avg    0.87164   0.87146   0.87155    208306\n",
    "weighted avg    0.87283   0.87286   0.87284    208306\n",
    "\n",
    "```"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### albert-tiny\n",
    "\n",
    "```text\n",
    "\n",
    "              precision    recall  f1-score   support\n",
    "\n",
    " not similar    0.84881   0.82946   0.83902    114914\n",
    "     similar    0.79588   0.81821   0.80689     93392\n",
    "\n",
    "    accuracy                        0.82441    208306\n",
    "   macro avg    0.82234   0.82383   0.82295    208306\n",
    "weighted avg    0.82508   0.82441   0.82461    208306\n",
    "\n",
    "```"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### xlnet-base\n",
    "\n",
    "```text\n",
    "\n",
    "              precision    recall  f1-score   support\n",
    "\n",
    " not similar    0.74384   0.92845   0.82596    114854\n",
    "     similar    0.87347   0.60705   0.71629     93452\n",
    "\n",
    "    accuracy                        0.78426    208306\n",
    "   macro avg    0.80866   0.76775   0.77112    208306\n",
    "weighted avg    0.80200   0.78426   0.77676    208306\n",
    "\n",
    "```"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### alxlnet-base\n",
    "\n",
    "```text\n",
    "\n",
    "              precision    recall  f1-score   support\n",
    "\n",
    " not similar    0.89614   0.90170   0.89891    114554\n",
    "     similar    0.87897   0.87231   0.87563     93752\n",
    "\n",
    "    accuracy                        0.88847    208306\n",
    "   macro avg    0.88756   0.88700   0.88727    208306\n",
    "weighted avg    0.88841   0.88847   0.88843    208306\n",
    "\n",
    "```"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Subjectivity Analysis\n",
    "\n",
    "Trained on 80% of dataset, tested on 20% of dataset. Link to download dataset available inside the notebooks. All training sessions stored in [session/subjectivity](https://github.com/huseinzol05/Malaya/tree/master/session/subjectivity)."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### multinomial\n",
    "\n",
    "```text\n",
    "\n",
    "              precision    recall  f1-score   support\n",
    "\n",
    "    negative    0.91527   0.87238   0.89331      1003\n",
    "    positive    0.87657   0.91818   0.89689       990\n",
    "\n",
    "    accuracy                        0.89513      1993\n",
    "   macro avg    0.89592   0.89528   0.89510      1993\n",
    "weighted avg    0.89605   0.89513   0.89509      1993\n",
    "\n",
    "\n",
    "```"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### bert-base\n",
    "\n",
    "```text\n",
    "\n",
    "              precision    recall  f1-score   support\n",
    "\n",
    "    negative    0.87825   0.96429   0.91926       980\n",
    "    positive    0.96183   0.87068   0.91399      1013\n",
    "\n",
    "    accuracy                        0.91671      1993\n",
    "   macro avg    0.92004   0.91748   0.91663      1993\n",
    "weighted avg    0.92073   0.91671   0.91658      1993\n",
    "\n",
    "```"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### tiny-bert\n",
    "\n",
    "```text\n",
    "\n",
    "              precision    recall  f1-score   support\n",
    "\n",
    "    negative    0.95678   0.84086   0.89508       974\n",
    "    positive    0.86368   0.96369   0.91095      1019\n",
    "\n",
    "    accuracy                        0.90366      1993\n",
    "   macro avg    0.91023   0.90228   0.90301      1993\n",
    "weighted avg    0.90917   0.90366   0.90319      1993\n",
    "\n",
    "```"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### albert-base\n",
    "\n",
    "```text\n",
    "\n",
    "              precision    recall  f1-score   support\n",
    "\n",
    "    negative    0.87616   0.94006   0.90699      1001\n",
    "    positive    0.93471   0.86593   0.89901       992\n",
    "\n",
    "    accuracy                        0.90316      1993\n",
    "   macro avg    0.90544   0.90299   0.90300      1993\n",
    "weighted avg    0.90531   0.90316   0.90301      1993\n",
    "\n",
    "```"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### albert-tiny\n",
    "\n",
    "```text\n",
    "\n",
    "              precision    recall  f1-score   support\n",
    "\n",
    "    negative    0.90070   0.89184   0.89625      1017\n",
    "    positive    0.88844   0.89754   0.89297       976\n",
    "\n",
    "    accuracy                        0.89463      1993\n",
    "   macro avg    0.89457   0.89469   0.89461      1993\n",
    "weighted avg    0.89469   0.89463   0.89464      1993\n",
    "\n",
    "```"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### xlnet-base\n",
    "\n",
    "```text\n",
    "              precision    recall  f1-score   support\n",
    "\n",
    "    negative    0.89613   0.94616   0.92047      1003\n",
    "    positive    0.94218   0.88889   0.91476       990\n",
    "\n",
    "    accuracy                        0.91771      1993\n",
    "   macro avg    0.91916   0.91753   0.91761      1993\n",
    "weighted avg    0.91901   0.91771   0.91763      1993\n",
    "\n",
    "```"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### alxlnet-base\n",
    "\n",
    "```text\n",
    "\n",
    "              precision    recall  f1-score   support\n",
    "\n",
    "    negative    0.89258   0.92604   0.90900       987\n",
    "    positive    0.92466   0.89066   0.90734      1006\n",
    "\n",
    "    accuracy                        0.90818      1993\n",
    "   macro avg    0.90862   0.90835   0.90817      1993\n",
    "weighted avg    0.90877   0.90818   0.90816      1993\n",
    "\n",
    "```"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Toxicity Analysis\n",
    "\n",
    "Trained on 80% of dataset, tested on 20% of dataset. Link to download dataset available inside the notebooks. All training sessions stored in [session/toxic](https://github.com/huseinzol05/Malaya/tree/master/session/toxic)."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### multinomial\n",
    "\n",
    "```text\n",
    "\n",
    "                                     precision    recall  f1-score   support\n",
    "\n",
    "                       severe toxic    0.32096   0.99468   0.48532      9955\n",
    "                            obscene    0.06031   0.68096   0.11081      2799\n",
    "                    identity attack    0.03312   0.60086   0.06277      1393\n",
    "                             insult    0.15655   0.69002   0.25519     12575\n",
    "                             threat    0.00661   0.11058   0.01247       416\n",
    "                              asian    0.00087   0.01799   0.00166       389\n",
    "                            atheist    0.00137   0.04494   0.00266       178\n",
    "                           bisexual    0.00052   0.08333   0.00104        24\n",
    "                           buddhist    0.00000   0.00000   0.00000        45\n",
    "                          christian    0.13652   0.86153   0.23570      4622\n",
    "                             female    0.12714   0.78073   0.21867      6891\n",
    "                       heterosexual    0.00153   0.06299   0.00299       127\n",
    "                             indian    0.14732   0.97509   0.25597      4014\n",
    "         homosexual, gay or lesbian    0.04442   0.45581   0.08095      1369\n",
    "intellectual or learning disability    0.00000   0.00000   0.00000         6\n",
    "                               male    0.08106   0.58298   0.14233      4947\n",
    "                             muslim    0.07845   0.59531   0.13863      2602\n",
    "                   other disability    0.00000   0.00000   0.00000         0\n",
    "                       other gender    0.00000   0.00000   0.00000         2\n",
    "            other race or ethnicity    0.00000   0.00000   0.00000         7\n",
    "                     other religion    0.00000   0.00000   0.00000         8\n",
    "           other sexual orientation    0.00000   0.00000   0.00000         1\n",
    "                physical disability    0.00000   0.00000   0.00000         2\n",
    "      psychiatric or mental illness    0.00720   0.09651   0.01340       601\n",
    "                        transgender    0.00249   0.06608   0.00481       227\n",
    "                              malay    0.54919   0.99337   0.70733     17044\n",
    "                            chinese    0.29545   0.99079   0.45517      8793\n",
    "\n",
    "                          micro avg    0.14989   0.82799   0.25383     79037\n",
    "                          macro avg    0.07597   0.35869   0.11807     79037\n",
    "                       weighted avg    0.25444   0.82799   0.37086     79037\n",
    "                        samples avg    0.07772   0.16003   0.09295     79037\n",
    "  \n",
    "```"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "####  bert-base\n",
    "\n",
    "```text\n",
    "\n",
    "                                     precision    recall  f1-score   support\n",
    "\n",
    "                       severe toxic    0.85194   0.65179   0.73854      9790\n",
    "                            obscene    0.63710   0.41623   0.50351      2847\n",
    "                    identity attack    0.63238   0.29603   0.40328      1412\n",
    "                             insult    0.71381   0.56111   0.62832     12673\n",
    "                             threat    0.56707   0.22850   0.32574       407\n",
    "                              asian    0.54394   0.56965   0.55650       402\n",
    "                            atheist    0.80097   0.96491   0.87533       171\n",
    "                           bisexual    1.00000   0.51852   0.68293        27\n",
    "                           buddhist    0.60938   0.90698   0.72897        43\n",
    "                          christian    0.86376   0.86044   0.86210      4679\n",
    "                             female    0.88242   0.90816   0.89510      6925\n",
    "                       heterosexual    0.67073   0.81481   0.73579       135\n",
    "                             indian    0.95325   0.88580   0.91829      4028\n",
    "         homosexual, gay or lesbian    0.88355   0.92161   0.90218      1416\n",
    "intellectual or learning disability    0.00000   0.00000   0.00000         6\n",
    "                               male    0.75975   0.59414   0.66682      5019\n",
    "                             muslim    0.87416   0.89385   0.88390      2619\n",
    "                   other disability    0.00000   0.00000   0.00000         0\n",
    "                       other gender    0.00000   0.00000   0.00000         0\n",
    "            other race or ethnicity    0.00000   0.00000   0.00000        11\n",
    "                     other religion    0.14286   0.09091   0.11111        11\n",
    "           other sexual orientation    0.00000   0.00000   0.00000         0\n",
    "                physical disability    0.00000   0.00000   0.00000         6\n",
    "      psychiatric or mental illness    0.60000   0.81588   0.69148       592\n",
    "                        transgender    0.79012   0.87671   0.83117       219\n",
    "                              malay    0.96219   0.96486   0.96352     16987\n",
    "                            chinese    0.94062   0.90214   0.92098      8727\n",
    "\n",
    "                          micro avg    0.86098   0.77313   0.81469     79152\n",
    "                          macro avg    0.58074   0.54233   0.54909     79152\n",
    "                       weighted avg    0.84966   0.77313   0.80502     79152\n",
    "                        samples avg    0.15924   0.15441   0.15445     79152\n",
    "  \n",
    "```"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### tiny-bert\n",
    "\n",
    "```text\n",
    "\n",
    "                                     precision    recall  f1-score   support\n",
    "\n",
    "                       severe toxic    0.77495   0.77346   0.77421      9857\n",
    "                            obscene    0.62343   0.41033   0.49492      2788\n",
    "                    identity attack    0.55057   0.34761   0.42616      1378\n",
    "                             insult    0.69412   0.56324   0.62187     12659\n",
    "                             threat    0.60825   0.13170   0.21651       448\n",
    "                              asian    0.66667   0.47478   0.55459       337\n",
    "                            atheist    0.85784   0.92593   0.89059       189\n",
    "                           bisexual    1.00000   0.05263   0.10000        19\n",
    "                           buddhist    0.63043   0.67442   0.65169        43\n",
    "                          christian    0.79541   0.89441   0.84201      4612\n",
    "                             female    0.85257   0.92515   0.88738      6907\n",
    "                       heterosexual    0.67785   0.78295   0.72662       129\n",
    "                             indian    0.94898   0.87673   0.91143      3967\n",
    "         homosexual, gay or lesbian    0.88188   0.92275   0.90185      1424\n",
    "intellectual or learning disability    0.00000   0.00000   0.00000         5\n",
    "                               male    0.70644   0.64640   0.67509      4918\n",
    "                             muslim    0.81178   0.94261   0.87232      2544\n",
    "                   other disability    0.00000   0.00000   0.00000         0\n",
    "                       other gender    0.00000   0.00000   0.00000         0\n",
    "            other race or ethnicity    0.00000   0.00000   0.00000         7\n",
    "                     other religion    0.00000   0.00000   0.00000         9\n",
    "           other sexual orientation    0.00000   0.00000   0.00000         2\n",
    "                physical disability    0.00000   0.00000   0.00000         4\n",
    "      psychiatric or mental illness    0.67727   0.76410   0.71807       585\n",
    "                        transgender    0.80090   0.84689   0.82326       209\n",
    "                              malay    0.95652   0.97334   0.96486     16839\n",
    "                            chinese    0.96350   0.88984   0.92521      8869\n",
    "\n",
    "                          micro avg    0.83535   0.79611   0.81526     78748\n",
    "                          macro avg    0.57331   0.51182   0.51773     78748\n",
    "                       weighted avg    0.82603   0.79611   0.80692     78748\n",
    "                        samples avg    0.15765   0.15682   0.15490     78748\n",
    "                        \n",
    "```"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### albert-base\n",
    "\n",
    "```text\n",
    "\n",
    "                                     precision    recall  f1-score   support\n",
    "\n",
    "                       severe toxic    0.79715   0.71003   0.75107      9863\n",
    "                            obscene    0.64770   0.38489   0.48285      2780\n",
    "                    identity attack    0.65517   0.27496   0.38736      1382\n",
    "                             insult    0.73404   0.49344   0.59016     12652\n",
    "                             threat    0.68478   0.14754   0.24277       427\n",
    "                              asian    0.67557   0.48361   0.56369       366\n",
    "                            atheist    0.85149   0.91489   0.88205       188\n",
    "                           bisexual    0.93750   0.62500   0.75000        24\n",
    "                           buddhist    0.55556   0.33333   0.41667        45\n",
    "                          christian    0.84738   0.87439   0.86068      4737\n",
    "                             female    0.88191   0.91253   0.89696      6997\n",
    "                       heterosexual    0.76812   0.76812   0.76812       138\n",
    "                             indian    0.92663   0.91164   0.91907      4142\n",
    "         homosexual, gay or lesbian    0.89547   0.92446   0.90973      1390\n",
    "intellectual or learning disability    0.00000   0.00000   0.00000         7\n",
    "                               male    0.73157   0.61368   0.66746      5014\n",
    "                             muslim    0.86958   0.87620   0.87288      2496\n",
    "                   other disability    0.00000   0.00000   0.00000         0\n",
    "                       other gender    0.00000   0.00000   0.00000         1\n",
    "            other race or ethnicity    0.00000   0.00000   0.00000        11\n",
    "                     other religion    0.00000   0.00000   0.00000         9\n",
    "           other sexual orientation    0.00000   0.00000   0.00000         1\n",
    "                physical disability    0.00000   0.00000   0.00000         1\n",
    "      psychiatric or mental illness    0.65781   0.72131   0.68810       549\n",
    "                        transgender    0.76995   0.84536   0.80590       194\n",
    "                              malay    0.98510   0.94072   0.96240     16869\n",
    "                            chinese    0.90845   0.95077   0.92913      8694\n",
    "\n",
    "                          micro avg    0.86054   0.76973   0.81261     78977\n",
    "                          macro avg    0.58448   0.50766   0.53137     78977\n",
    "                       weighted avg    0.84634   0.76973   0.79982     78977\n",
    "                        samples avg    0.15569   0.15257   0.15179     78977\n",
    "\n",
    "```"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### albert-tiny\n",
    "\n",
    "```text\n",
    "\n",
    "                                     precision    recall  f1-score   support\n",
    "\n",
    "                       severe toxic    0.78533   0.72620   0.75460      9788\n",
    "                            obscene    0.67641   0.33796   0.45072      2808\n",
    "                    identity attack    0.66042   0.22988   0.34104      1379\n",
    "                             insult    0.74085   0.47457   0.57854     12662\n",
    "                             threat    0.52941   0.02153   0.04138       418\n",
    "                              asian    0.65027   0.29975   0.41034       397\n",
    "                            atheist    0.85882   0.82022   0.83908       178\n",
    "                           bisexual    1.00000   0.03125   0.06061        32\n",
    "                           buddhist    0.73333   0.26190   0.38596        42\n",
    "                          christian    0.87017   0.84438   0.85708      4723\n",
    "                             female    0.85865   0.92302   0.88967      6963\n",
    "                       heterosexual    0.76147   0.70339   0.73128       118\n",
    "                             indian    0.93209   0.90115   0.91636      4097\n",
    "         homosexual, gay or lesbian    0.89625   0.89690   0.89658      1387\n",
    "intellectual or learning disability    0.00000   0.00000   0.00000         7\n",
    "                               male    0.68679   0.62619   0.65509      4941\n",
    "                             muslim    0.86187   0.87102   0.86642      2543\n",
    "                   other disability    0.00000   0.00000   0.00000         0\n",
    "                       other gender    0.00000   0.00000   0.00000         0\n",
    "            other race or ethnicity    0.00000   0.00000   0.00000         8\n",
    "                     other religion    0.00000   0.00000   0.00000         8\n",
    "           other sexual orientation    0.00000   0.00000   0.00000         0\n",
    "                physical disability    0.00000   0.00000   0.00000         1\n",
    "      psychiatric or mental illness    0.74208   0.57243   0.64631       573\n",
    "                        transgender    0.79327   0.76037   0.77647       217\n",
    "                              malay    0.99392   0.93774   0.96501     16896\n",
    "                            chinese    0.89948   0.96317   0.93024      8770\n",
    "\n",
    "                          micro avg    0.85977   0.76240   0.80816     78956\n",
    "                          macro avg    0.59003   0.45196   0.48121     78956\n",
    "                       weighted avg    0.84448   0.76240   0.79239     78956\n",
    "                        samples avg    0.15465   0.15102   0.15056     78956\n",
    "\n",
    "```"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### xlnet-base\n",
    "\n",
    "```text\n",
    "\n",
    "                                     precision    recall  f1-score   support\n",
    "\n",
    "                       severe toxic    0.76274   0.78363   0.77305     10006\n",
    "                            obscene    0.50862   0.52366   0.51603      2874\n",
    "                    identity attack    0.40349   0.52707   0.45707      1404\n",
    "                             insult    0.58435   0.70709   0.63989     12717\n",
    "                             threat    0.29885   0.46547   0.36400       391\n",
    "                              asian    0.41160   0.74425   0.53005       391\n",
    "                            atheist    0.78571   0.96175   0.86486       183\n",
    "                           bisexual    0.54545   0.72000   0.62069        25\n",
    "                           buddhist    0.54054   0.80000   0.64516        50\n",
    "                          christian    0.73638   0.92561   0.82022      4584\n",
    "                             female    0.87304   0.92314   0.89739      6935\n",
    "                       heterosexual    0.70130   0.81818   0.75524       132\n",
    "                             indian    0.92564   0.91477   0.92018      4001\n",
    "         homosexual, gay or lesbian    0.84066   0.93236   0.88414      1375\n",
    "intellectual or learning disability    0.10526   0.50000   0.17391         4\n",
    "                               male    0.71216   0.65484   0.68230      5044\n",
    "                             muslim    0.83993   0.92537   0.88058      2546\n",
    "                   other disability    0.00000   0.00000   0.00000         0\n",
    "                       other gender    0.00000   0.00000   0.00000         0\n",
    "            other race or ethnicity    0.00000   0.00000   0.00000         9\n",
    "                     other religion    0.15625   0.71429   0.25641         7\n",
    "           other sexual orientation    0.00000   0.00000   0.00000         0\n",
    "                physical disability    0.05556   0.33333   0.09524         3\n",
    "      psychiatric or mental illness    0.57323   0.86678   0.69008       578\n",
    "                        transgender    0.76557   0.90086   0.82772       232\n",
    "                              malay    0.95376   0.97807   0.96576     17103\n",
    "                            chinese    0.94832   0.90540   0.92636      8837\n",
    "\n",
    "                          micro avg    0.77904   0.83829   0.80758     79431\n",
    "                          macro avg    0.51957   0.64911   0.56246     79431\n",
    "                       weighted avg    0.79150   0.83829   0.81220     79431\n",
    "                        samples avg    0.16354   0.16681   0.16247     79431\n",
    "  \n",
    "```"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### alxlnet-base\n",
    "\n",
    "```text\n",
    "\n",
    "                                     precision    recall  f1-score   support\n",
    "\n",
    "                       severe toxic    0.81360   0.70005   0.75257      9795\n",
    "                            obscene    0.55653   0.49517   0.52406      2694\n",
    "                    identity attack    0.50581   0.34938   0.41329      1371\n",
    "                             insult    0.68040   0.60330   0.63953     12672\n",
    "                             threat    0.40360   0.35123   0.37560       447\n",
    "                              asian    0.66192   0.49077   0.56364       379\n",
    "                            atheist    0.87151   0.92308   0.89655       169\n",
    "                           bisexual    0.87500   0.60870   0.71795        23\n",
    "                           buddhist    0.60417   0.63043   0.61702        46\n",
    "                          christian    0.88051   0.84715   0.86351      4619\n",
    "                             female    0.86958   0.92578   0.89680      6979\n",
    "                       heterosexual    0.75000   0.82051   0.78367       117\n",
    "                             indian    0.96407   0.87242   0.91596      4029\n",
    "         homosexual, gay or lesbian    0.88719   0.93265   0.90935      1366\n",
    "intellectual or learning disability    0.21429   0.50000   0.30000         6\n",
    "                               male    0.72205   0.65407   0.68638      4897\n",
    "                             muslim    0.82114   0.94992   0.88085      2576\n",
    "                   other disability    0.00000   0.00000   0.00000         0\n",
    "                       other gender    0.00000   0.00000   0.00000         1\n",
    "            other race or ethnicity    0.00000   0.00000   0.00000         9\n",
    "                     other religion    0.00000   0.00000   0.00000         6\n",
    "           other sexual orientation    0.00000   0.00000   0.00000         2\n",
    "                physical disability    0.00000   0.00000   0.00000         1\n",
    "      psychiatric or mental illness    0.57044   0.87589   0.69091       564\n",
    "                        transgender    0.71756   0.88679   0.79325       212\n",
    "                              malay    0.95141   0.97619   0.96364     17051\n",
    "                            chinese    0.92615   0.92417   0.92516      8888\n",
    "\n",
    "                          micro avg    0.83376   0.80221   0.81768     78919\n",
    "                          macro avg    0.56470   0.56732   0.55962     78919\n",
    "                       weighted avg    0.82757   0.80221   0.81282     78919\n",
    "                        samples avg    0.16116   0.15980   0.15799     78919\n",
    "  \n",
    "```"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Entities Recognition Ontonotes5\n",
    "\n",
    "Trained on 80% of dataset, tested on 20% of dataset. Link to download dataset available inside the notebooks. All training sessions stored in [session/entities-ontonotes5](https://github.com/huseinzol05/Malaya/tree/master/session/entities-ontonotes5)."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### bert-base\n",
    "\n",
    "```text\n",
    "              precision    recall  f1-score   support\n",
    "\n",
    "     ADDRESS    0.99858   0.99974   0.99916     93446\n",
    "    CARDINAL    0.93840   0.90631   0.92207     48255\n",
    "        DATE    0.95490   0.93656   0.94564    126548\n",
    "       EVENT    0.92876   0.93591   0.93232      5711\n",
    "         FAC    0.93271   0.92658   0.92964     27392\n",
    "         GPE    0.93437   0.94852   0.94139    101357\n",
    "    LANGUAGE    0.93478   0.96389   0.94911       803\n",
    "         LAW    0.94824   0.95744   0.95281     24834\n",
    "         LOC    0.94148   0.93213   0.93678     34538\n",
    "       MONEY    0.87803   0.87563   0.87683     30032\n",
    "        NORP    0.95516   0.90446   0.92912     57014\n",
    "     ORDINAL    0.91510   0.91083   0.91296      6213\n",
    "         ORG    0.92453   0.95354   0.93881    219533\n",
    "       OTHER    0.99135   0.99308   0.99221   3553350\n",
    "         PAD    0.99956   1.00000   0.99978   1292421\n",
    "     PERCENT    0.96287   0.96814   0.96550     21722\n",
    "      PERSON    0.97376   0.93891   0.95602    101981\n",
    "     PRODUCT    0.87537   0.81769   0.84555     11124\n",
    "    QUANTITY    0.94385   0.92483   0.93424     11614\n",
    "        TIME    0.91912   0.90170   0.91033      9502\n",
    " WORK_OF_ART    0.93126   0.81978   0.87197     13800\n",
    "           X    0.99906   0.99792   0.99849   1350434\n",
    "\n",
    "    accuracy                        0.98821   7141624\n",
    "   macro avg    0.94460   0.93244   0.93822   7141624\n",
    "weighted avg    0.98821   0.98821   0.98818   7141624\n",
    "```"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### tiny-bert\n",
    "\n",
    "```text\n",
    "              precision    recall  f1-score   support\n",
    "\n",
    "     ADDRESS    0.99501   0.99981   0.99740     93446\n",
    "    CARDINAL    0.93442   0.87581   0.90416     48255\n",
    "        DATE    0.93723   0.92710   0.93214    126548\n",
    "       EVENT    0.78758   0.93942   0.85682      5711\n",
    "         FAC    0.91859   0.91403   0.91630     27392\n",
    "         GPE    0.92833   0.93455   0.93143    101357\n",
    "    LANGUAGE    0.90220   0.81569   0.85677       803\n",
    "         LAW    0.92771   0.95289   0.94013     24834\n",
    "         LOC    0.92497   0.91983   0.92239     34538\n",
    "       MONEY    0.84986   0.85362   0.85174     30032\n",
    "        NORP    0.93555   0.89741   0.91608     57014\n",
    "     ORDINAL    0.86050   0.92435   0.89129      6213\n",
    "         ORG    0.93290   0.93551   0.93420    219533\n",
    "       OTHER    0.99018   0.99121   0.99070   3553350\n",
    "         PAD    0.99956   1.00000   0.99978   1292421\n",
    "     PERCENT    0.95852   0.96165   0.96008     21722\n",
    "      PERSON    0.93958   0.95846   0.94893    101981\n",
    "     PRODUCT    0.86273   0.77742   0.81786     11124\n",
    "    QUANTITY    0.90690   0.90839   0.90764     11614\n",
    "        TIME    0.89077   0.89339   0.89208      9502\n",
    " WORK_OF_ART    0.83798   0.78145   0.80873     13800\n",
    "           X    0.99872   0.99767   0.99819   1350434\n",
    "\n",
    "    accuracy                        0.98592   7141624\n",
    "   macro avg    0.91908   0.91635   0.91704   7141624\n",
    "weighted avg    0.98590   0.98592   0.98589   7141624\n",
    "```"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### albert-base\n",
    "\n",
    "```text\n",
    "              precision    recall  f1-score   support\n",
    "\n",
    "     ADDRESS    0.99832   0.99969   0.99901     93446\n",
    "    CARDINAL    0.93291   0.89046   0.91119     48255\n",
    "        DATE    0.94941   0.93032   0.93977    126548\n",
    "       EVENT    0.89330   0.92506   0.90890      5711\n",
    "         FAC    0.92540   0.91257   0.91894     27392\n",
    "         GPE    0.93484   0.93404   0.93444    101357\n",
    "    LANGUAGE    0.87207   0.92528   0.89789       803\n",
    "         LAW    0.95567   0.95140   0.95353     24834\n",
    "         LOC    0.91754   0.92362   0.92057     34538\n",
    "       MONEY    0.85349   0.87696   0.86507     30032\n",
    "        NORP    0.91698   0.91416   0.91557     57014\n",
    "     ORDINAL    0.89159   0.93320   0.91192      6213\n",
    "         ORG    0.95070   0.92537   0.93786    219533\n",
    "       OTHER    0.99000   0.99315   0.99157   3553350\n",
    "         PAD    1.00000   1.00000   1.00000   1291289\n",
    "     PERCENT    0.96746   0.95953   0.96348     21722\n",
    "      PERSON    0.93748   0.96710   0.95206    101981\n",
    "     PRODUCT    0.84749   0.77832   0.81143     11124\n",
    "    QUANTITY    0.92798   0.92079   0.92437     11614\n",
    "        TIME    0.91058   0.88413   0.89716      9502\n",
    " WORK_OF_ART    0.88983   0.77196   0.82671     13800\n",
    "           X    0.99917   0.99799   0.99858   1351758\n",
    "\n",
    "    accuracy                        0.98714   7141816\n",
    "   macro avg    0.93010   0.92341   0.92636   7141816\n",
    "weighted avg    0.98707   0.98714   0.98707   7141816\n",
    "```"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### albert-tiny\n",
    "\n",
    "```text\n",
    "              precision    recall  f1-score   support\n",
    "\n",
    "     ADDRESS    0.99322   0.99706   0.99513     93446\n",
    "    CARDINAL    0.89708   0.84719   0.87142     48255\n",
    "        DATE    0.92805   0.90984   0.91886    126548\n",
    "       EVENT    0.82915   0.90247   0.86426      5711\n",
    "         FAC    0.93366   0.84317   0.88611     27392\n",
    "         GPE    0.90096   0.91216   0.90653    101357\n",
    "    LANGUAGE    0.87336   0.74720   0.80537       803\n",
    "         LAW    0.91511   0.91677   0.91594     24834\n",
    "         LOC    0.90699   0.89105   0.89895     34538\n",
    "       MONEY    0.83930   0.84377   0.84152     30032\n",
    "        NORP    0.88694   0.85367   0.86999     57014\n",
    "     ORDINAL    0.82854   0.88041   0.85369      6213\n",
    "         ORG    0.91629   0.89916   0.90764    219533\n",
    "       OTHER    0.98521   0.98937   0.98728   3553350\n",
    "         PAD    1.00000   1.00000   1.00000   1291289\n",
    "     PERCENT    0.95564   0.95898   0.95731     21722\n",
    "      PERSON    0.90445   0.93733   0.92060    101981\n",
    "     PRODUCT    0.79770   0.72456   0.75937     11124\n",
    "    QUANTITY    0.88216   0.86310   0.87252     11614\n",
    "        TIME    0.84045   0.86203   0.85110      9502\n",
    " WORK_OF_ART    0.85491   0.64130   0.73286     13800\n",
    "           X    0.99631   0.99466   0.99549   1351758\n",
    "\n",
    "    accuracy                        0.98068   7141816\n",
    "   macro avg    0.90298   0.88251   0.89145   7141816\n",
    "weighted avg    0.98053   0.98068   0.98054   7141816\n",
    "```"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### xlnet-base\n",
    "\n",
    "```text\n",
    "              precision    recall  f1-score   support\n",
    "\n",
    "     ADDRESS    0.99908   0.99993   0.99950     93446\n",
    "    CARDINAL    0.93228   0.92861   0.93044     48255\n",
    "        DATE    0.95220   0.95546   0.95383    126548\n",
    "       EVENT    0.90646   0.95535   0.93026      5711\n",
    "         FAC    0.94217   0.93432   0.93823     27392\n",
    "         GPE    0.95861   0.94860   0.95358    101357\n",
    "    LANGUAGE    0.91076   0.99128   0.94931       803\n",
    "         LAW    0.93475   0.96392   0.94911     24834\n",
    "         LOC    0.92387   0.94305   0.93336     34538\n",
    "       MONEY    0.85448   0.93027   0.89077     30032\n",
    "        NORP    0.95467   0.92540   0.93981     57014\n",
    "     ORDINAL    0.89995   0.95847   0.92829      6213\n",
    "         ORG    0.94905   0.95571   0.95237    219533\n",
    "       OTHER    0.99394   0.99254   0.99324   3553350\n",
    "         PAD    0.99992   1.00000   0.99996   1292031\n",
    "     PERCENT    0.97215   0.96423   0.96817     21722\n",
    "      PERSON    0.96138   0.97204   0.96668    101981\n",
    "     PRODUCT    0.88197   0.83028   0.85534     11124\n",
    "    QUANTITY    0.93301   0.95695   0.94483     11614\n",
    "        TIME    0.90852   0.91454   0.91152      9502\n",
    " WORK_OF_ART    0.87106   0.88457   0.87776     13800\n",
    "           X    0.99879   0.99898   0.99889   1349384\n",
    "\n",
    "    accuracy                        0.98994   7140184\n",
    "   macro avg    0.93814   0.95021   0.94388   7140184\n",
    "weighted avg    0.99001   0.98994   0.98996   7140184\n",
    "```"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### alxlnet-base\n",
    "\n",
    "```text\n",
    "              precision    recall  f1-score   support\n",
    "\n",
    "     ADDRESS    0.99949   0.99981   0.99965     93446\n",
    "    CARDINAL    0.92765   0.91402   0.92078     48255\n",
    "        DATE    0.95309   0.93386   0.94338    126548\n",
    "       EVENT    0.88426   0.93241   0.90770      5711\n",
    "         FAC    0.92367   0.92991   0.92678     27392\n",
    "         GPE    0.93880   0.95315   0.94592    101357\n",
    "    LANGUAGE    0.84296   0.90909   0.87478       803\n",
    "         LAW    0.95472   0.95091   0.95281     24834\n",
    "         LOC    0.92551   0.92953   0.92751     34538\n",
    "       MONEY    0.86719   0.87403   0.87060     30032\n",
    "        NORP    0.95470   0.89518   0.92398     57014\n",
    "     ORDINAL    0.86582   0.94721   0.90469      6213\n",
    "         ORG    0.95300   0.93138   0.94206    219533\n",
    "       OTHER    0.99080   0.99334   0.99206   3553350\n",
    "         PAD    0.99992   1.00000   0.99996   1292031\n",
    "     PERCENT    0.96856   0.96851   0.96853     21722\n",
    "      PERSON    0.94616   0.96716   0.95655    101981\n",
    "     PRODUCT    0.87820   0.79333   0.83361     11124\n",
    "    QUANTITY    0.94752   0.91872   0.93290     11614\n",
    "        TIME    0.90322   0.90949   0.90635      9502\n",
    " WORK_OF_ART    0.88971   0.79732   0.84098     13800\n",
    "           X    0.99883   0.99891   0.99887   1349384\n",
    "\n",
    "    accuracy                        0.98816   7140184\n",
    "   macro avg    0.93244   0.92942   0.93047   7140184\n",
    "weighted avg    0.98810   0.98816   0.98810   7140184\n",
    "```"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
